library(drake)
library(tidyverse)
loadd(data, data_trans, data_mice)

Look at NA values in raw data

map_int(data, ~sum(!is.na(.)))
##                               iso3c                        n_amr_events 
##                                 190                                  59 
##                  n_amr_first_events                  health_expend_perc 
##                                  59                                 183 
##              migrant_pop_per_capita                          population 
##                                 190                                 190 
##                       ab_export_bin                      english_spoken 
##                                 162                                 190 
##               human_consumption_ddd                       livestock_pcu 
##                                  68                                 164 
##                ab_export_per_capita                ab_import_per_capita 
##                                  87                                 161 
## livestock_consumption_kg_per_capita                      gdp_per_capita 
##                                  31                                 190 
##         tourism_outbound_per_capita          tourism_inbound_per_capita 
##                                  44                                 111 
##                 pubcrawl_per_capita          promed_mentions_per_capita 
##                                 180                                 189
data %>% 
  filter(pubcrawl_per_capita == 0 | is.na(pubcrawl_per_capita)) %>%
  nrow()
## [1] 33
data %>% 
  filter(promed_mentions_per_capita == 0 | is.na( promed_mentions_per_capita)) %>%
  nrow()
## [1] 1
data %>% 
  filter(ab_export_per_capita == 0 | is.na( ab_export_per_capita)) %>%
  nrow()
## [1] 103

Look at NA values and distributions post-NA processing

map_int(data_trans, ~sum(!is.na(.)))
##                                  iso3c                           n_amr_events 
##                                    190                                    190 
##                     health_expend_perc              ln_migrant_pop_per_capita 
##                                    183                                    190 
##                          ln_population                          ab_export_bin 
##                                    190                                    190 
##                         english_spoken                  human_consumption_ddd 
##                                    190                                     68 
##                       ln_livestock_pcu                ln_ab_export_per_capita 
##                                    164                                    190 
##                ln_ab_import_per_capita ln_livestock_consumption_kg_per_capita 
##                                    161                                     31 
##                      ln_gdp_per_capita         ln_tourism_outbound_per_capita 
##                                    190                                     44 
##          ln_tourism_inbound_per_capita                 ln_pubcrawl_per_capita 
##                                    111                                    190 
##          ln_promed_mentions_per_capita 
##                                    190
map_lgl(data_trans, ~any(is.infinite(.))) # confirm no infinite values
##                                  iso3c                           n_amr_events 
##                                  FALSE                                  FALSE 
##                     health_expend_perc              ln_migrant_pop_per_capita 
##                                  FALSE                                  FALSE 
##                          ln_population                          ab_export_bin 
##                                  FALSE                                  FALSE 
##                         english_spoken                  human_consumption_ddd 
##                                  FALSE                                  FALSE 
##                       ln_livestock_pcu                ln_ab_export_per_capita 
##                                  FALSE                                  FALSE 
##                ln_ab_import_per_capita ln_livestock_consumption_kg_per_capita 
##                                  FALSE                                  FALSE 
##                      ln_gdp_per_capita         ln_tourism_outbound_per_capita 
##                                  FALSE                                  FALSE 
##          ln_tourism_inbound_per_capita                 ln_pubcrawl_per_capita 
##                                  FALSE                                  FALSE 
##          ln_promed_mentions_per_capita 
##                                  FALSE
data_trans %>%
  select(-iso3c, -n_amr_events, -english_spoken) %>%
  gather() %>%
  ggplot(aes(x = value)) +
  geom_histogram() + 
  facet_wrap(key~., scales = "free")

data_trans %>%
  dplyr::select(-iso3c, -ln_livestock_pcu, -ln_ab_import_per_capita, -ab_export_bin, -english_spoken) %>%
  PerformanceAnalytics::chart.Correlation(., histogram = TRUE, pch = 19, method = "spearman")

Look at imputed data

plot(data_mice) # On convergence, the different streams should be freely intermingled with one another, without showing any definite trends. Convergence is diagnosed when the variance between different sequences is no larger than the variance within each individual sequence.

show_imputes(data_mice, m = data_mice[["m"]], raw = data_trans)

imp <- complete(data_mice)
imp %>%
  dplyr::select(-iso3c) %>%
  PerformanceAnalytics::chart.Correlation(., histogram = TRUE, pch = 19, method = "spearman")